knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'Figs/',
echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(RColorBrewer)
library(stringr)
### Set up some options
options(stringsAsFactors = FALSE) ### Ensure strings come in as character types
### generic theme for all plots
ggtheme_plot <- function(base_size = 9) {
theme(axis.ticks = element_blank(),
text = element_text(family = 'Helvetica', color = 'gray30', size = base_size),
plot.title = element_text(size = rel(1.25), hjust = 0, face = 'bold'),
panel.background = element_blank(),
legend.position = 'right',
panel.border = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(colour = 'grey90', size = .25),
# panel.grid.major = element_blank(),
legend.key = element_rect(colour = NA, fill = NA),
axis.line = element_blank()) # element_line(colour = "grey30", size = .5))
}
Plot a few parameters against each other
Parameters vs temperature
z <- read_csv('data/water_qual_student.csv')
R_thresh <- .1
temp_df <- z %>%
filter(param_desc == 'WATER TEMPERATURE DEG C') %>%
filter(!is.na(MeasureValue)) %>%
group_by(SampleDate) %>%
summarize(temp = mean(MeasureValue))
params <- z$param_desc %>% unique() %>%
.[!str_detect(., 'WATER TEMPERATURE DEG C')]
for (param in params) {
### param <- params[1]
tmp <- z %>%
filter(param_desc %in% param) %>%
inner_join(temp_df, by = 'SampleDate') %>%
select(FIPS, place_name, EventId, SampleDate, param_desc, MeasureValue, temp, Unit) %>%
distinct() %>%
arrange(SampleDate)
if(nrow(tmp) > 500) {
tmp <- tmp %>%
filter(MeasureValue < quantile(MeasureValue, .995, na.rm = TRUE))
}
mdl_R <- lm(MeasureValue ~ temp, data = tmp) %>%
summary()
if(mdl_R$adj.r.squared > R_thresh) {
plot_units <- tmp$Unit[!is.na(tmp$Unit)][1]
plot_param_short <- tmp$Parameter[1]
param_plot <- ggplot(tmp, aes(x = temp, y = MeasureValue)) +
geom_point(aes(color = place_name), alpha = .5) +
stat_smooth(method = 'lm', color = 'grey20', size = .5) +
labs(title = tools::toTitleCase(param),
y = paste0(plot_param_short, ' (', plot_units, ')'),
x = 'Temperature (°C)')
print(param_plot)
}
cat(sprintf('<br>%s vs Temp:<br> R^2^ = %.4f<hr>', param, mdl_R$adj.r.squared))
}
WHOLE 5-DAY BIOCHEMICAL OXYGEN DEMAND MG/L vs Temp:
R
2 = 0.0197

ACTIVE CHLOROPHYLL-A UG/L vs Temp:
R
2 = 0.1386

DISSOLVED OXYGEN IN MG/L MG/L vs Temp:
R
2 = 0.5506
HARDNESS AS CACO3 MG/L vs Temp:
R
2 = 0.0063
AMMONIUM NITROGEN AS N (FILTERED SAMPLE) MG/L vs Temp:
R
2 = 0.0053
NITRITE+NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Temp:
R
2 = 0.0918
NITRITE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Temp:
R
2 = 0.0233

NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Temp:
R
2 = 0.1051
PH CORRECTED FOR TEMPERATURE (25 DEG C) SU vs Temp:
R
2 = 0.0036
SALINITY UNITS IN PPT AND EQUAL TO PRACTICAL SALNITY UNITS (PSU) PPT vs Temp:
R
2 = 0.0692
SECCHI DEPTH M vs Temp:
R
2 = 0.0140
TOTAL ALKALINITY AS CACO3 MG/L vs Temp:
R
2 = 0.0163
TOTAL DISSOLVED NITROGEN MG/L vs Temp:
R
2 = 0.0962
TOTAL DISSOLVED PHOSPHORUS MG/L vs Temp:
R
2 = 0.0035
TOTAL SUSPENDED SOLIDS MG/L vs Temp:
R
2 = 0.0060
TURBIDITY; NEPHELOMETRIC METHOD NTU vs Temp:
R
2 = 0.0096
Parameters vs oxygen
z <- read_csv('data/water_qual_student.csv')
R_thresh <- .1
o2_df <- z %>%
filter(param_desc == 'DISSOLVED OXYGEN IN MG/L MG/L') %>%
filter(!is.na(MeasureValue)) %>%
group_by(SampleDate) %>%
summarize(o2 = mean(MeasureValue))
params <- z$param_desc %>% unique() %>%
.[!str_detect(., 'DISSOLVED OXYGEN')]
for (param in params) {
### param <- params[1]
tmp <- z %>%
filter(param_desc %in% param) %>%
inner_join(o2_df, by = 'SampleDate') %>%
select(FIPS, place_name, EventId, SampleDate, param_desc, MeasureValue, o2, Unit) %>%
distinct() %>%
arrange(SampleDate)
if(nrow(tmp) > 500) {
tmp <- tmp %>%
filter(MeasureValue < quantile(MeasureValue, .995, na.rm = TRUE))
}
mdl_R <- lm(MeasureValue ~ o2, data = tmp) %>%
summary()
if(mdl_R$adj.r.squared > R_thresh) {
plot_units <- tmp$Unit[!is.na(tmp$Unit)][1]
plot_param_short <- tmp$Parameter[1]
param_plot <- ggplot(tmp, aes(x = o2, y = MeasureValue)) +
geom_point(aes(color = place_name), alpha = .5) +
stat_smooth(method = 'lm', color = 'grey20', size = .5) +
labs(title = tools::toTitleCase(param),
y = paste0(plot_param_short, ' (', plot_units, ')'),
x = 'Dissolved Oxygen (mg/L)')
print(param_plot)
}
cat(sprintf('<br>%s vs Dissolved Oxygen:<br> R^2^ = %.4f<hr>', param, mdl_R$adj.r.squared))
}
WHOLE 5-DAY BIOCHEMICAL OXYGEN DEMAND MG/L vs Dissolved Oxygen:
R
2 = 0.0243
ACTIVE CHLOROPHYLL-A UG/L vs Dissolved Oxygen:
R
2 = 0.0699
HARDNESS AS CACO3 MG/L vs Dissolved Oxygen:
R
2 = 0.0346
AMMONIUM NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Oxygen:
R
2 = 0.0025

NITRITE+NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Oxygen:
R
2 = 0.1068
NITRITE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Oxygen:
R
2 = 0.0581

NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Oxygen:
R
2 = 0.1074
PH CORRECTED FOR TEMPERATURE (25 DEG C) SU vs Dissolved Oxygen:
R
2 = 0.0017
SALINITY UNITS IN PPT AND EQUAL TO PRACTICAL SALNITY UNITS (PSU) PPT vs Dissolved Oxygen:
R
2 = 0.0614
SECCHI DEPTH M vs Dissolved Oxygen:
R
2 = 0.0381
TOTAL ALKALINITY AS CACO3 MG/L vs Dissolved Oxygen:
R
2 = 0.0035
TOTAL DISSOLVED NITROGEN MG/L vs Dissolved Oxygen:
R
2 = 0.0973
TOTAL DISSOLVED PHOSPHORUS MG/L vs Dissolved Oxygen:
R
2 = -0.0014
TOTAL SUSPENDED SOLIDS MG/L vs Dissolved Oxygen:
R
2 = 0.0026
TURBIDITY; NEPHELOMETRIC METHOD NTU vs Dissolved Oxygen:
R
2 = -0.0000

WATER TEMPERATURE DEG C vs Dissolved Oxygen:
R
2 = 0.6943
Parameters vs nitrogen
z <- read_csv('data/water_qual_student.csv')
R_thresh <- .1
n2_df <- z %>%
filter(str_detect(param_desc, 'DISSOLVED NITROGEN')) %>%
filter(!is.na(MeasureValue)) %>%
group_by(SampleDate) %>%
summarize(n2 = mean(MeasureValue))
params <- z$param_desc %>% unique() %>%
.[!str_detect(., 'DISSOLVED NITROGEN')]
for (param in params) {
### param <- params[1]
tmp <- z %>%
filter(param_desc %in% param) %>%
inner_join(n2_df, by = 'SampleDate') %>%
select(FIPS, place_name, EventId, SampleDate, param_desc, MeasureValue, n2, Unit) %>%
distinct() %>%
arrange(SampleDate)
if(nrow(tmp) > 500) {
tmp <- tmp %>%
filter(MeasureValue < quantile(MeasureValue, .995, na.rm = TRUE))
}
mdl_R <- lm(MeasureValue ~ n2, data = tmp) %>%
summary()
if(mdl_R$adj.r.squared > R_thresh) {
plot_units <- tmp$Unit[!is.na(tmp$Unit)][1]
plot_param_short <- tmp$Parameter[1]
param_plot <- ggplot(tmp, aes(x = n2, y = MeasureValue)) +
geom_point(aes(color = place_name), alpha = .5) +
stat_smooth(method = 'lm', color = 'grey20', size = .5) +
labs(title = tools::toTitleCase(param),
y = paste0(plot_param_short, ' (', plot_units, ')'),
x = 'Dissolved Nitrogen (mg/L)')
print(param_plot)
}
cat(sprintf('<br>%s vs Dissolved Nitrogen:<br> R^2^ = %.4f<hr>', param, mdl_R$adj.r.squared))
}
WHOLE 5-DAY BIOCHEMICAL OXYGEN DEMAND MG/L vs Dissolved Nitrogen:
R
2 = 0.0070
ACTIVE CHLOROPHYLL-A UG/L vs Dissolved Nitrogen:
R
2 = 0.0066

DISSOLVED OXYGEN IN MG/L MG/L vs Dissolved Nitrogen:
R
2 = 0.1415
HARDNESS AS CACO3 MG/L vs Dissolved Nitrogen:
R
2 = -0.0040
AMMONIUM NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Nitrogen:
R
2 = 0.0767

NITRITE+NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Nitrogen:
R
2 = 0.3626
NITRITE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Nitrogen:
R
2 = 0.0367

NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Nitrogen:
R
2 = 0.3513
PH CORRECTED FOR TEMPERATURE (25 DEG C) SU vs Dissolved Nitrogen:
R
2 = 0.0198
SALINITY UNITS IN PPT AND EQUAL TO PRACTICAL SALNITY UNITS (PSU) PPT vs Dissolved Nitrogen:
R
2 = 0.0590
SECCHI DEPTH M vs Dissolved Nitrogen:
R
2 = 0.0143
TOTAL ALKALINITY AS CACO3 MG/L vs Dissolved Nitrogen:
R
2 = 0.0254
TOTAL DISSOLVED PHOSPHORUS MG/L vs Dissolved Nitrogen:
R
2 = 0.0177
TOTAL SUSPENDED SOLIDS MG/L vs Dissolved Nitrogen:
R
2 = 0.0201
TURBIDITY; NEPHELOMETRIC METHOD NTU vs Dissolved Nitrogen:
R
2 = 0.0268

WATER TEMPERATURE DEG C vs Dissolved Nitrogen:
R
2 = 0.1974
Parameters vs phosphorus
z <- read_csv('data/water_qual_student.csv')
R_thresh <- .1
p_df <- z %>%
filter(str_detect(param_desc, 'DISSOLVED PHOSPHORUS')) %>%
filter(!is.na(MeasureValue)) %>%
group_by(SampleDate) %>%
summarize(p = mean(MeasureValue)) %>%
filter(p < quantile(p, .99))
params <- z$param_desc %>% unique() %>%
.[!str_detect(., 'DISSOLVED PHOSPHORUS')]
for (param in params) {
### param <- params[1]
tmp <- z %>%
filter(param_desc %in% param) %>%
inner_join(p_df, by = 'SampleDate') %>%
select(FIPS, place_name, EventId, SampleDate, param_desc, MeasureValue, p, Unit) %>%
distinct() %>%
arrange(SampleDate)
if(nrow(tmp) > 500) {
tmp <- tmp %>%
filter(MeasureValue < quantile(MeasureValue, .995, na.rm = TRUE))
}
mdl_R <- lm(MeasureValue ~ p, data = tmp) %>%
summary()
if(mdl_R$adj.r.squared > R_thresh) {
plot_units <- tmp$Unit[!is.na(tmp$Unit)][1]
plot_param_short <- tmp$Parameter[1]
param_plot <- ggplot(tmp, aes(x = p, y = MeasureValue)) +
geom_point(aes(color = place_name), alpha = .5) +
stat_smooth(method = 'lm', color = 'grey20', size = .5) +
labs(title = tools::toTitleCase(param),
y = paste0(plot_param_short, ' (', plot_units, ')'),
x = 'Dissolved Phosphorus (mg/L)')
print(param_plot)
}
cat(sprintf('<br>%s vs Dissolved Phosphorus:<br> R^2^ = %.4f<hr>', param, mdl_R$adj.r.squared))
}

WHOLE 5-DAY BIOCHEMICAL OXYGEN DEMAND MG/L vs Dissolved Phosphorus:
R
2 = 0.1360
ACTIVE CHLOROPHYLL-A UG/L vs Dissolved Phosphorus:
R
2 = 0.0186
DISSOLVED OXYGEN IN MG/L MG/L vs Dissolved Phosphorus:
R
2 = 0.0029
HARDNESS AS CACO3 MG/L vs Dissolved Phosphorus:
R
2 = 0.0754

AMMONIUM NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Phosphorus:
R
2 = 0.1170
NITRITE+NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Phosphorus:
R
2 = 0.0209

NITRITE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Phosphorus:
R
2 = 0.1398
NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Dissolved Phosphorus:
R
2 = 0.0251
PH CORRECTED FOR TEMPERATURE (25 DEG C) SU vs Dissolved Phosphorus:
R
2 = 0.0744
SALINITY UNITS IN PPT AND EQUAL TO PRACTICAL SALNITY UNITS (PSU) PPT vs Dissolved Phosphorus:
R
2 = 0.0078
SECCHI DEPTH M vs Dissolved Phosphorus:
R
2 = 0.0656
TOTAL ALKALINITY AS CACO3 MG/L vs Dissolved Phosphorus:
R
2 = -0.0021
TOTAL DISSOLVED NITROGEN MG/L vs Dissolved Phosphorus:
R
2 = 0.0256

TOTAL SUSPENDED SOLIDS MG/L vs Dissolved Phosphorus:
R
2 = 0.1065

TURBIDITY; NEPHELOMETRIC METHOD NTU vs Dissolved Phosphorus:
R
2 = 0.1935
WATER TEMPERATURE DEG C vs Dissolved Phosphorus:
R
2 = 0.0136
Parameters vs turbidity
z <- read_csv('data/water_qual_student.csv')
R_thresh <- .1
turb_df <- z %>%
filter(str_detect(param_desc, 'TURBIDITY')) %>%
filter(!is.na(MeasureValue)) %>%
group_by(SampleDate) %>%
summarize(turb = mean(MeasureValue)) %>%
filter(turb < quantile(turb, .99))
params <- z$param_desc %>% unique() %>%
.[!str_detect(., 'TURBIDITY')]
for (param in params) {
### param <- params[1]
tmp <- z %>%
filter(param_desc %in% param) %>%
inner_join(turb_df, by = 'SampleDate') %>%
select(FIPS, place_name, EventId, SampleDate, param_desc, MeasureValue, turb, Unit) %>%
distinct() %>%
arrange(SampleDate)
if(nrow(tmp) > 500) {
tmp <- tmp %>%
filter(MeasureValue < quantile(MeasureValue, .995, na.rm = TRUE))
}
mdl_R <- lm(MeasureValue ~ turb, data = tmp) %>%
summary()
if(mdl_R$adj.r.squared > R_thresh) {
plot_units <- tmp$Unit[!is.na(tmp$Unit)][1]
plot_param_short <- tmp$Parameter[1]
param_plot <- ggplot(tmp, aes(x = turb, y = MeasureValue)) +
geom_point(aes(color = place_name), alpha = .5) +
stat_smooth(method = 'lm', color = 'grey20', size = .5) +
labs(title = tools::toTitleCase(param),
y = paste0(plot_param_short, ' (', plot_units, ')'),
x = 'Turbidity (NTU)')
print(param_plot)
}
cat(sprintf('<br>%s vs Turbidity:<br> R^2^ = %.4f<hr>', param, mdl_R$adj.r.squared))
}
WHOLE 5-DAY BIOCHEMICAL OXYGEN DEMAND MG/L vs Turbidity:
R
2 = 0.0651
ACTIVE CHLOROPHYLL-A UG/L vs Turbidity:
R
2 = 0.0152
DISSOLVED OXYGEN IN MG/L MG/L vs Turbidity:
R
2 = -0.0002
HARDNESS AS CACO3 MG/L vs Turbidity:
R
2 = 0.0725
AMMONIUM NITROGEN AS N (FILTERED SAMPLE) MG/L vs Turbidity:
R
2 = 0.0074
NITRITE+NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Turbidity:
R
2 = 0.0277
NITRITE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Turbidity:
R
2 = 0.0030
NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L vs Turbidity:
R
2 = 0.0208
PH CORRECTED FOR TEMPERATURE (25 DEG C) SU vs Turbidity:
R
2 = 0.0385
SALINITY UNITS IN PPT AND EQUAL TO PRACTICAL SALNITY UNITS (PSU) PPT vs Turbidity:
R
2 = 0.0046

SECCHI DEPTH M vs Turbidity:
R
2 = 0.1311

TOTAL ALKALINITY AS CACO3 MG/L vs Turbidity:
R
2 = 0.1068
TOTAL DISSOLVED NITROGEN MG/L vs Turbidity:
R
2 = 0.0257
TOTAL DISSOLVED PHOSPHORUS MG/L vs Turbidity:
R
2 = 0.0502

TOTAL SUSPENDED SOLIDS MG/L vs Turbidity:
R
2 = 0.3017
WATER TEMPERATURE DEG C vs Turbidity:
R
2 = 0.0062